library(tidyverse)
library(broom)
library(lubridate)
library(RColorBrewer)
library(leaflet)
library(crosstalk)
library(htmltools)
library(sf)
library(mapdata)
library(tigris)
Our data came split into separate files by year. Here, we read each of these files, and remove the columns we do not plan on using. These are set to not run because the csv files are too large to easily push to GitHub, and Posit would frequently crash trying to read them.
StormEvents_2010 <- read_csv("../data/ignore/StormEvents_details-ftp_v1.0_d2010_c20250520.csv")
StormEvents_2010 <- StormEvents_2010|>
filter(EVENT_TYPE %in% c("Avalanche","Blizzard","Drought","Flood","Flash Flood","Excessive Heat","Tornado","Tropical Storm","Tsunami","Wildfire"))
StormEvents_2010 <- StormEvents_2010 |>
select(-CZ_TYPE,-CZ_NAME,-TOR_F_SCALE, -TOR_LENGTH,-TOR_WIDTH,-TOR_OTHER_WFO,-TOR_OTHER_CZ_STATE,-TOR_OTHER_CZ_FIPS,-TOR_OTHER_CZ_NAME,-BEGIN_RANGE,-BEGIN_AZIMUTH,-BEGIN_LOCATION,-END_RANGE,-END_AZIMUTH,-END_LOCATION,-EPISODE_NARRATIVE,-EVENT_NARRATIVE,-DATA_SOURCE,-WFO,-YEAR)
StormEvents_2011 <- read_csv("../data/ignore/StormEvents_details-ftp_v1.0_d2011_c20250520.csv")
StormEvents_2011 <- StormEvents_2011|>
filter(EVENT_TYPE %in% c("Avalanche","Blizzard","Drought","Flood","Flash Flood","Excessive Heat","Tornado","Tropical Storm","Tsunami","Wildfire"))
StormEvents_2011 <- StormEvents_2011 |>
select(-CZ_TYPE,-CZ_NAME,-TOR_F_SCALE, -TOR_LENGTH,-TOR_WIDTH,-TOR_OTHER_WFO,-TOR_OTHER_CZ_STATE,-TOR_OTHER_CZ_FIPS,-TOR_OTHER_CZ_NAME,-BEGIN_RANGE,-BEGIN_AZIMUTH,-BEGIN_LOCATION,-END_RANGE,-END_AZIMUTH,-END_LOCATION,-EPISODE_NARRATIVE,-EVENT_NARRATIVE,-DATA_SOURCE,-WFO,-YEAR)
StormEvents_2012 <- read_csv("../data/ignore/StormEvents_details-ftp_v1.0_d2012_c20250520.csv")
StormEvents_2012 <- StormEvents_2012|>
filter(EVENT_TYPE %in% c("Avalanche","Blizzard","Drought","Flood","Flash Flood","Excessive Heat","Tornado","Tropical Storm","Tsunami","Wildfire"))
StormEvents_2012 <- StormEvents_2012 |>
select(-CZ_TYPE,-CZ_NAME,-TOR_F_SCALE, -TOR_LENGTH,-TOR_WIDTH,-TOR_OTHER_WFO,-TOR_OTHER_CZ_STATE,-TOR_OTHER_CZ_FIPS,-TOR_OTHER_CZ_NAME,-BEGIN_RANGE,-BEGIN_AZIMUTH,-BEGIN_LOCATION,-END_RANGE,-END_AZIMUTH,-END_LOCATION,-EPISODE_NARRATIVE,-EVENT_NARRATIVE,-DATA_SOURCE,-WFO,-YEAR)
StormEvents_2013 <- read_csv("../data/ignore/StormEvents_details-ftp_v1.0_d2013_c20250520.csv")
StormEvents_2013 <- StormEvents_2013|>
filter(EVENT_TYPE %in% c("Avalanche","Blizzard","Drought","Flood","Flash Flood","Excessive Heat","Tornado","Tropical Storm","Tsunami","Wildfire"))
StormEvents_2013 <- StormEvents_2013 |>
select(-CZ_TYPE,-CZ_NAME,-TOR_F_SCALE, -TOR_LENGTH,-TOR_WIDTH,-TOR_OTHER_WFO,-TOR_OTHER_CZ_STATE,-TOR_OTHER_CZ_FIPS,-TOR_OTHER_CZ_NAME,-BEGIN_RANGE,-BEGIN_AZIMUTH,-BEGIN_LOCATION,-END_RANGE,-END_AZIMUTH,-END_LOCATION,-EPISODE_NARRATIVE,-EVENT_NARRATIVE,-DATA_SOURCE,-WFO,-YEAR)
StormEvents_2014 <- read_csv("../data/ignore/StormEvents_details-ftp_v1.0_d2014_c20250520.csv")
StormEvents_2014 <- StormEvents_2014|>
filter(EVENT_TYPE %in% c("Avalanche","Blizzard","Drought","Flood","Flash Flood","Excessive Heat","Tornado","Tropical Storm","Tsunami","Wildfire"))
StormEvents_2014 <- StormEvents_2014 |>
select(-CZ_TYPE,-CZ_NAME,-TOR_F_SCALE, -TOR_LENGTH,-TOR_WIDTH,-TOR_OTHER_WFO,-TOR_OTHER_CZ_STATE,-TOR_OTHER_CZ_FIPS,-TOR_OTHER_CZ_NAME,-BEGIN_RANGE,-BEGIN_AZIMUTH,-BEGIN_LOCATION,-END_RANGE,-END_AZIMUTH,-END_LOCATION,-EPISODE_NARRATIVE,-EVENT_NARRATIVE,-DATA_SOURCE,-WFO,-YEAR)
StormEvents_2015 <- read_csv("../data/ignore/StormEvents_details-ftp_v1.0_d2015_c20250818.csv")
StormEvents_2015 <- StormEvents_2015|>
filter(EVENT_TYPE %in% c("Avalanche","Blizzard","Drought","Flood","Flash Flood","Excessive Heat","Tornado","Tropical Storm","Tsunami","Wildfire"))
StormEvents_2015 <- StormEvents_2015 |>
select(-CZ_TYPE,-CZ_NAME,-TOR_F_SCALE, -TOR_LENGTH,-TOR_WIDTH,-TOR_OTHER_WFO,-TOR_OTHER_CZ_STATE,-TOR_OTHER_CZ_FIPS,-TOR_OTHER_CZ_NAME,-BEGIN_RANGE,-BEGIN_AZIMUTH,-BEGIN_LOCATION,-END_RANGE,-END_AZIMUTH,-END_LOCATION,-EPISODE_NARRATIVE,-EVENT_NARRATIVE,-DATA_SOURCE,-WFO,-YEAR)
StormEvents_2016 <- read_csv("../data/ignore/StormEvents_details-ftp_v1.0_d2016_c20250818.csv")
StormEvents_2016 <- StormEvents_2016|>
filter(EVENT_TYPE %in% c("Avalanche","Blizzard","Drought","Flood","Flash Flood","Excessive Heat","Tornado","Tropical Storm","Tsunami","Wildfire"))
StormEvents_2016 <- StormEvents_2016 |>
select(-CZ_TYPE,-CZ_NAME,-TOR_F_SCALE, -TOR_LENGTH,-TOR_WIDTH,-TOR_OTHER_WFO,-TOR_OTHER_CZ_STATE,-TOR_OTHER_CZ_FIPS,-TOR_OTHER_CZ_NAME,-BEGIN_RANGE,-BEGIN_AZIMUTH,-BEGIN_LOCATION,-END_RANGE,-END_AZIMUTH,-END_LOCATION,-EPISODE_NARRATIVE,-EVENT_NARRATIVE,-DATA_SOURCE,-WFO,-YEAR)
StormEvents_2017 <- read_csv("../data/ignore/StormEvents_details-ftp_v1.0_d2017_c20250520.csv")
StormEvents_2017 <- StormEvents_2017|>
filter(EVENT_TYPE %in% c("Avalanche","Blizzard","Drought","Flood","Flash Flood","Excessive Heat","Tornado","Tropical Storm","Tsunami","Wildfire"))
StormEvents_2017 <- StormEvents_2017 |>
select(-CZ_TYPE,-CZ_NAME,-TOR_F_SCALE, -TOR_LENGTH,-TOR_WIDTH,-TOR_OTHER_WFO,-TOR_OTHER_CZ_STATE,-TOR_OTHER_CZ_FIPS,-TOR_OTHER_CZ_NAME,-BEGIN_RANGE,-BEGIN_AZIMUTH,-BEGIN_LOCATION,-END_RANGE,-END_AZIMUTH,-END_LOCATION,-EPISODE_NARRATIVE,-EVENT_NARRATIVE,-DATA_SOURCE,-WFO,-YEAR)
StormEvents_2018 <- read_csv("../data/ignore/StormEvents_details-ftp_v1.0_d2018_c20250520.csv")
StormEvents_2018 <- StormEvents_2018|>
filter(EVENT_TYPE %in% c("Avalanche","Blizzard","Drought","Flood","Flash Flood","Excessive Heat","Tornado","Tropical Storm","Tsunami","Wildfire"))
StormEvents_2018 <- StormEvents_2018 |>
select(-CZ_TYPE,-CZ_NAME,-TOR_F_SCALE, -TOR_LENGTH,-TOR_WIDTH,-TOR_OTHER_WFO,-TOR_OTHER_CZ_STATE,-TOR_OTHER_CZ_FIPS,-TOR_OTHER_CZ_NAME,-BEGIN_RANGE,-BEGIN_AZIMUTH,-BEGIN_LOCATION,-END_RANGE,-END_AZIMUTH,-END_LOCATION,-EPISODE_NARRATIVE,-EVENT_NARRATIVE,-DATA_SOURCE,-WFO,-YEAR)
StormEvents_2019 <- read_csv("../data/ignore/StormEvents_details-ftp_v1.0_d2019_c20250520.csv")
StormEvents_2019 <- StormEvents_2019|>
filter(EVENT_TYPE %in% c("Avalanche","Blizzard","Drought","Flood","Flash Flood","Excessive Heat","Tornado","Tropical Storm","Tsunami","Wildfire"))
StormEvents_2019 <- StormEvents_2019|>
select(-CZ_TYPE,-CZ_NAME,-TOR_F_SCALE, -TOR_LENGTH,-TOR_WIDTH,-TOR_OTHER_WFO,-TOR_OTHER_CZ_STATE,-TOR_OTHER_CZ_FIPS,-TOR_OTHER_CZ_NAME,-BEGIN_RANGE,-BEGIN_AZIMUTH,-BEGIN_LOCATION,-END_RANGE,-END_AZIMUTH,-END_LOCATION,-EPISODE_NARRATIVE,-EVENT_NARRATIVE,-DATA_SOURCE,-WFO,-YEAR)
StormEvents_2020 <- read_csv("../data/ignore/StormEvents_details-ftp_v1.0_d2020_c20250702.csv")
StormEvents_2020 <- StormEvents_2020|>
filter(EVENT_TYPE %in% c("Avalanche","Blizzard","Drought","Flood","Flash Flood","Excessive Heat","Tornado","Tropical Storm","Tsunami","Wildfire"))
StormEvents_2020 <- StormEvents_2020|>
select(-CZ_TYPE,-CZ_NAME,-TOR_F_SCALE, -TOR_LENGTH,-TOR_WIDTH,-TOR_OTHER_WFO,-TOR_OTHER_CZ_STATE,-TOR_OTHER_CZ_FIPS,-TOR_OTHER_CZ_NAME,-BEGIN_RANGE,-BEGIN_AZIMUTH,-BEGIN_LOCATION,-END_RANGE,-END_AZIMUTH,-END_LOCATION,-EPISODE_NARRATIVE,-EVENT_NARRATIVE,-DATA_SOURCE,-WFO,-YEAR)
Next, we took all these individual CSVs and combined them into one. This is also set to not run since the original CSVs are not loaded.
storms <- rbind(StormEvents_2010,
StormEvents_2011,
StormEvents_2012,
StormEvents_2013,
StormEvents_2014,
StormEvents_2015,
StormEvents_2016,
StormEvents_2017,
StormEvents_2018,
StormEvents_2019,
StormEvents_2020)
write_csv(storms, file="../data/storms2.csv")
# reload CSV because above chunks are eval=FALSE to avoid crashing Posit.
storms <- read_csv("../data/storms2.csv")
To put the dates in a more usable format, we split up the format from year month into two columns, year and month.
storms <- storms|>
separate(
col = BEGIN_YEARMONTH,
into = c("BEGIN_YEAR", "BEGIN_MONTH"),
sep = 4,
convert = TRUE
)
We also change the date format into one that can be graphed with ggplot without any gaps.
storms <- storms|>
mutate(BEGIN_DATE = as_date(BEGIN_DATE_TIME))
We create a new dataframe for this plot, and count the number of storms per year.
storms_by_year <- storms|>
count(BEGIN_YEAR, name = "n_events")
ggplot(storms_by_year, aes(x = BEGIN_YEAR, y = n_events)) +
geom_line(linewidth = 1) +
geom_point(size = 2) +
labs(
title = "Frequency of Extreme Weather Events (2010–2020)",
subtitle = "Filtered to major natural disasters",
x = "Year",
y = "Number of Events"
) +
theme_minimal()+
scale_x_continuous(breaks = c(2010,2012,2014,2016,2018,2020))
#ggsave("storms_by_year.png")
First, we sum the indirect and direct injuries and deaths into one column, casualties, and merge that data frame with the storms by year dataset.
storms_by_year <- storms %>%
count(BEGIN_YEAR, name = "n_events")
storm_casualties <- storms |>
rowwise(EVENT_ID)|>
mutate(CASUALTIES = sum(c(INJURIES_DIRECT, INJURIES_INDIRECT, DEATHS_DIRECT, DEATHS_INDIRECT)))|>
group_by(BEGIN_YEAR)|>
summarise(casualties_year = sum(CASUALTIES))
casualties_storms <- left_join(storm_casualties, storms_by_year)
## Joining with `by = join_by(BEGIN_YEAR)`
To add in the total damage cost, we must reformat the way the cost is written. In the dataset, cost is abbreviated with K for thousand, and values with no damage are a mix between 0.00K and NA. Here, we rewrite the cost to a usable format and set all NA and 0.00K values to 0. Then, we join the casualty and cost dataframes.
#Replace NA with 0
storm_damages <- storms
storm_damages$DAMAGE_PROPERTY[is.na(storm_damages$DAMAGE_PROPERTY)] <- 0
storm_damages$DAMAGE_CROPS[is.na(storm_damages$DAMAGE_CROPS)] <- 0
# Convert crop damage to numeric
storm_damages$CROPS_NUM <- numeric(length(storm_damages$DAMAGE_CROPS))
k_rows <- grepl("K", storm_damages$DAMAGE_CROPS)
storm_damages$CROPS_NUM[k_rows] <- as.numeric(gsub("K", "", storm_damages$DAMAGE_CROPS[k_rows])) / 100
# Convert property damage to numeric
storm_damages$PROP_NUM <- numeric(length(storm_damages$DAMAGE_PROPERTY))
k_rows <- grepl("K", storm_damages$DAMAGE_PROPERTY)
storm_damages$PROP_NUM[k_rows] <- as.numeric(gsub("K", "", storm_damages$DAMAGE_PROPERTY[k_rows])) / 100
#Find total damage cost per year
storm_damages <- storm_damages|>
rowwise(EVENT_ID)|>
mutate(DAMAGES = sum(c(PROP_NUM, CROPS_NUM)))|>
group_by(BEGIN_YEAR)|>
summarise(damage_year = sum(DAMAGES))
casualties_storms <- left_join(casualties_storms, storm_damages)
## Joining with `by = join_by(BEGIN_YEAR)`
ggplot(casualties_storms, aes(BEGIN_YEAR)) +
geom_line(aes(y = n_events, color = "orange")) +
geom_line(aes(y = casualties_year, color = "red"))+
geom_line(aes(y = damage_year, color = "green"))+
geom_point(aes(y = n_events, color = "orange")) +
geom_point(aes(y = casualties_year, color = "red"))+
geom_point(aes(y = damage_year, color = "green"))+
labs(
title = "Frequency of extreme weather events, cost, and injuries (2010–2020)",
subtitle = "Filtered to major natural disasters",
x = "Year",
y = "Frequency",
color = ""
) +
theme_minimal()+
scale_color_manual(
values = c("orange" = "black", "red" = "purple", "green" = "#00cd00"),
labels = c("orange" = "# of storms",
"red" = "Injures + Fatalities",
"green" = "Damages (in $100k)"))+
scale_x_continuous(breaks = c(2010,2012,2014,2016,2018,2020))
#ggsave("storms_injuries_cost.png")
Here, we count
storms_by_type_year <- storms |>
count(BEGIN_YEAR, EVENT_TYPE, name = "n_events")|>
filter(EVENT_TYPE %in% c("Blizzard","Drought","Flood","Flash Flood","Excessive Heat","Tornado","Tropical Storm","Wildfire"))
ggplot(storms_by_type_year,
aes(x = BEGIN_YEAR,
y = n_events,
fill = EVENT_TYPE)) +
geom_area(alpha = 0.8, position = "stack") +
labs(
title = "Extreme Weather Events by Storm Type (2010–2020)",
subtitle = "Stacked area shows how different storm types contribute to total events",
x = "Year",
y = "Number of Events",
fill = "Storm type"
) +
theme_minimal() +
scale_x_continuous(breaks = c(2010, 2012, 2014, 2016, 2018, 2020)) +
scale_fill_brewer(palette = "Set2")
#ggsave("storms_by_type.png")
#crosstalk code from https://stackoverflow.com/questions/62849300/r-leaflet-add-a-range-slider-to-filter-markers-without-shiny
map_storms <- storms|>
filter(EVENT_TYPE %in% c("Tornado"))|>
mutate(id = seq_len(n()))
shared_data <- SharedData$new(map_storms, key = ~id, group = "tornado_events")
#slider <- filter_slider("date", "Year", shared_data, ~BEGIN_YEAR, step = 1, ticks = FALSE, width = "90%", dragRange = FALSE, sep = NULL)
slider <- filter_select("year_select", "Select Year", shared_data, ~BEGIN_YEAR, allLevels = TRUE, multiple = FALSE)
leaflet_map <- leaflet(shared_data) %>%
addProviderTiles(provider = providers$OpenStreetMap) %>%
addCircleMarkers(lng = ~BEGIN_LON,
lat = ~BEGIN_LAT,
popup = ~EVENT_TYPE,
color = "red",
radius = 1,
opacity = 0.6,
group = ~EVENT_TYPE)
tagList(
slider,
leaflet_map)
First, we filter to just tornado, then filter to only include the relevant columns. Finally, we combine two location identifiers, STATE_FIPS and CZ_FIPS. Those are numeric identifiers for each state and county. On the map, there is a column called GEOID, which is the STATE_FIPS followed by the CZ_FIPS. Combining them here allows us to pinpoint exacly what county each event happened in.
#Take storms, make just tornados, create GEOID from FIPS
T_coords <- storms|>
filter(EVENT_TYPE == "Tornado")|>
select(BEGIN_LAT, BEGIN_LON, STATE, STATE_FIPS, CZ_FIPS)|>
mutate(CTY_FIPS = str_pad(CZ_FIPS, width = 3, side = "left", pad = "0"))|>
mutate(GEOID = paste0(STATE_FIPS, CTY_FIPS))
#Total tornados per county
tornados_per <- T_coords|>
count(GEOID, name = "T_count")
#create the map
map <- map_data('county')|>
rename(NAME = subregion)
#get data from tigris package abt counties, create GEOID from it
geoid_df <- counties(state = NULL, cb = TRUE)|>
filter(!(STATEFP %in% c(2,15)))
## Retrieving data for the year 2024
## | | | 0% | | | 1% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |== | 4% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======= | 11% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========= | 14% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================ | 24% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 29% | |===================== | 30% | |===================== | 31% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================= | 34% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 39% | |============================ | 40% | |============================ | 41% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |============================== | 44% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 49% | |=================================== | 50% | |=================================== | 51% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |===================================== | 54% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 58% | |========================================= | 59% | |========================================== | 59% | |========================================== | 60% | |========================================== | 61% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================ | 64% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |=================================================== | 74% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 79% | |======================================================== | 80% | |======================================================== | 81% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |========================================================== | 84% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 89% | |=============================================================== | 90% | |=============================================================== | 91% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================= | 94% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 99% | |======================================================================| 100%
#select(NAME, GEOID, -geometry)|>
#mutate(NAME = tolower(NAME))
#from https://jtr13.github.io/cc19/different-ways-of-plotting-u-s-map-in-r.html
ggplot(data=map, aes(x=long, y=lat, fill=region, group = group))+
geom_polygon(color = "white", linewidth = 0.1) +
guides(fill=FALSE)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.